import pandas as pd
import numpy as np
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')

c = 1000

df = pd.read_csv('../data/gpo_final_data/narratives_complete_with_metadata_manual_labels_rich_{0}.csv'.format(c), parse_dates=['date'], date_parser=dateparse)

# Add year field
df['year'] = df['date'].dt.year

# Replace zero and inf OR
df['or'].replace([np.inf, -np.inf, 0], np.nan, inplace=True)

# Get top partisan narratives
rep_narratives = list(df.groupby(['narrative']).mean()['or'].nlargest(10).reset_index().sort_values('or', ascending = True).narrative)
dem_narratives = list(df.groupby(['narrative']).mean()['or'].nsmallest(10).index)
narratives_to_plot = rep_narratives + dem_narratives
cleaned_df = df[df['narrative'].isin(narratives_to_plot)]
unique_df = cleaned_df.drop_duplicates(subset=['narrative'])
unique_df_sorted = pd.DataFrame(columns = ['narrative', 'log_or'])

for narrative in narratives_to_plot:
    temp_narrative = unique_df[unique_df['narrative']==narrative][['narrative', 'log_or']]
    unique_df_sorted = unique_df_sorted.append(temp_narrative, ignore_index=True)

unique_df_sorted.to_csv('../data/temp/partisan_narratives.csv', index=False)
